//! Discover Python files, and their corresponding [`Settings`], from the
//! filesystem.

use std::cmp::Ordering;
use std::collections::BTreeSet;
use std::ffi::OsStr;
use std::path::{Path, PathBuf};
use std::sync::RwLock;

use anyhow::{Context, Result};
use anyhow::{anyhow, bail};
use globset::{Candidate, GlobSet};
use ignore::{DirEntry, Error, ParallelVisitor, WalkBuilder, WalkState};
use itertools::Itertools;
use log::debug;
use matchit::{InsertError, Match, Router};
use path_absolutize::path_dedot;
use path_slash::PathExt;
use rustc_hash::{FxHashMap, FxHashSet};

use ruff_linter::fs;
use ruff_linter::package::PackageRoot;
use ruff_linter::packaging::is_package;

use crate::configuration::Configuration;
use crate::pyproject::{TargetVersionStrategy, settings_toml};
use crate::settings::Settings;
use crate::{FileResolverSettings, pyproject};

/// The configuration information from a `pyproject.toml` file.
#[derive(Debug)]
pub struct PyprojectConfig {
    /// The strategy used to discover the relevant `pyproject.toml` file for
    /// each Python file.
    pub strategy: PyprojectDiscoveryStrategy,
    /// All settings from the `pyproject.toml` file.
    pub settings: Settings,
    /// Absolute path to the `pyproject.toml` file. This would be `None` when
    /// either using the default settings or the `--isolated` flag is set.
    pub path: Option<PathBuf>,
}

impl PyprojectConfig {
    pub fn new(
        strategy: PyprojectDiscoveryStrategy,
        settings: Settings,
        path: Option<PathBuf>,
    ) -> Self {
        Self {
            strategy,
            settings,
            path: path.map(fs::normalize_path),
        }
    }
}

/// The strategy used to discover the relevant `pyproject.toml` file for each
/// Python file.
#[derive(Debug, Copy, Clone)]
pub enum PyprojectDiscoveryStrategy {
    /// Use a fixed `pyproject.toml` file for all Python files (i.e., one
    /// provided on the command-line).
    Fixed,
    /// Use the closest `pyproject.toml` file in the filesystem hierarchy, or
    /// the default settings.
    Hierarchical,
}

impl PyprojectDiscoveryStrategy {
    #[inline]
    pub const fn is_fixed(self) -> bool {
        matches!(self, PyprojectDiscoveryStrategy::Fixed)
    }

    #[inline]
    pub const fn is_hierarchical(self) -> bool {
        matches!(self, PyprojectDiscoveryStrategy::Hierarchical)
    }
}

/// The strategy for resolving file paths in a `pyproject.toml`.
#[derive(Copy, Clone)]
pub enum Relativity {
    /// Resolve file paths relative to the current working directory.
    Cwd,
    /// Resolve file paths relative to the directory containing the
    /// `pyproject.toml`.
    Parent,
}

impl Relativity {
    pub fn resolve(self, path: &Path) -> &Path {
        match self {
            Relativity::Parent => path
                .parent()
                .expect("Expected pyproject.toml file to be in parent directory"),
            Relativity::Cwd => &path_dedot::CWD,
        }
    }
}

#[derive(Debug)]
pub struct Resolver<'a> {
    pyproject_config: &'a PyprojectConfig,
    /// All [`Settings`] that have been added to the resolver.
    settings: Vec<Settings>,
    /// A router from path to index into the `settings` vector.
    router: Router<usize>,
}

impl<'a> Resolver<'a> {
    /// Create a new [`Resolver`] for the given [`PyprojectConfig`].
    pub fn new(pyproject_config: &'a PyprojectConfig) -> Self {
        Self {
            pyproject_config,
            settings: Vec::new(),
            router: Router::new(),
        }
    }

    /// Return the [`Settings`] from the [`PyprojectConfig`].
    #[inline]
    pub fn base_settings(&self) -> &Settings {
        &self.pyproject_config.settings
    }

    /// Return `true` if the [`Resolver`] is using a hierarchical discovery strategy.
    #[inline]
    pub fn is_hierarchical(&self) -> bool {
        self.pyproject_config.strategy.is_hierarchical()
    }

    /// Return `true` if the [`Resolver`] should force-exclude files passed directly to the CLI.
    #[inline]
    pub fn force_exclude(&self) -> bool {
        self.pyproject_config.settings.file_resolver.force_exclude
    }

    /// Return `true` if the [`Resolver`] should respect `.gitignore` files.
    #[inline]
    pub fn respect_gitignore(&self) -> bool {
        self.pyproject_config
            .settings
            .file_resolver
            .respect_gitignore
    }

    /// Add a resolved [`Settings`] under a given [`PathBuf`] scope.
    fn add(&mut self, path: &Path, settings: Settings) {
        self.settings.push(settings);

        // Normalize the path to use `/` separators and escape the '{' and '}' characters,
        // which matchit uses for routing parameters.
        let path = path.to_slash_lossy().replace('{', "{{").replace('}', "}}");

        match self
            .router
            .insert(format!("{path}/{{*filepath}}"), self.settings.len() - 1)
        {
            Ok(()) => {}
            Err(InsertError::Conflict { .. }) => {
                return;
            }
            Err(_) => unreachable!("file paths are escaped before being inserted in the router"),
        }

        // Insert a mapping that matches the directory itself (without a trailing slash).
        // Inserting should always succeed because conflicts are resolved above and the above insertion guarantees
        // that the path is correctly escaped.
        self.router.insert(path, self.settings.len() - 1).unwrap();
    }

    /// Return the appropriate [`Settings`] for a given [`Path`].
    pub fn resolve(&self, path: &Path) -> &Settings {
        match self.pyproject_config.strategy {
            PyprojectDiscoveryStrategy::Fixed => &self.pyproject_config.settings,
            PyprojectDiscoveryStrategy::Hierarchical => self
                .router
                .at(path.to_slash_lossy().as_ref())
                .map(|Match { value, .. }| &self.settings[*value])
                .unwrap_or(&self.pyproject_config.settings),
        }
    }

    /// Return a mapping from Python package to its package root.
    pub fn package_roots(
        &'a self,
        files: &[&'a Path],
    ) -> FxHashMap<&'a Path, Option<PackageRoot<'a>>> {
        // Pre-populate the module cache, since the list of files could (but isn't
        // required to) contain some `__init__.py` files.
        let mut package_cache: FxHashMap<&Path, bool> = FxHashMap::default();
        for file in files {
            if file.ends_with("__init__.py") {
                if let Some(parent) = file.parent() {
                    package_cache.insert(parent, true);
                }
            }
        }

        // Determine whether any of the settings require namespace packages. If not, we can save
        // a lookup for every file.
        let has_namespace_packages = self
            .settings()
            .any(|settings| !settings.linter.namespace_packages.is_empty());

        // Search for the package root for each file.
        let mut package_roots: FxHashMap<&Path, Option<PackageRoot<'_>>> = FxHashMap::default();
        for file in files {
            if let Some(package) = file.parent() {
                package_roots.entry(package).or_insert_with(|| {
                    let namespace_packages = if has_namespace_packages {
                        self.resolve(file).linter.namespace_packages.as_slice()
                    } else {
                        &[]
                    };
                    detect_package_root_with_cache(package, namespace_packages, &mut package_cache)
                        .map(|path| PackageRoot::Root { path })
                });
            }
        }

        // Discard any nested roots.
        //
        // For example, if `./foo/__init__.py` is a root, and then `./foo/bar` is empty, and
        // `./foo/bar/baz/__init__.py` was detected as a root, we should only consider
        // `./foo/__init__.py`.
        let mut non_roots = FxHashSet::default();
        let mut router: Router<&Path> = Router::new();
        for root in package_roots
            .values()
            .flatten()
            .copied()
            .map(PackageRoot::path)
            .collect::<BTreeSet<_>>()
        {
            // Normalize the path to use `/` separators and escape the '{' and '}' characters,
            // which matchit uses for routing parameters.
            let path = root.to_slash_lossy().replace('{', "{{").replace('}', "}}");
            if let Ok(matched) = router.at_mut(&path) {
                debug!(
                    "Ignoring nested package root: {} (under {})",
                    root.display(),
                    matched.value.display()
                );
                package_roots.insert(root, Some(PackageRoot::nested(root)));
                non_roots.insert(root);
            } else {
                let _ = router.insert(format!("{path}/{{*filepath}}"), root);
            }
        }

        package_roots
    }

    /// Return an iterator over the resolved [`Settings`] in this [`Resolver`].
    pub fn settings(&self) -> impl Iterator<Item = &Settings> {
        std::iter::once(&self.pyproject_config.settings).chain(&self.settings)
    }
}

/// A wrapper around `detect_package_root` to cache filesystem lookups.
fn detect_package_root_with_cache<'a>(
    path: &'a Path,
    namespace_packages: &[PathBuf],
    package_cache: &mut FxHashMap<&'a Path, bool>,
) -> Option<&'a Path> {
    let mut current = None;
    for parent in path.ancestors() {
        if !is_package_with_cache(parent, namespace_packages, package_cache) {
            return current;
        }
        current = Some(parent);
    }
    current
}

/// A wrapper around `is_package` to cache filesystem lookups.
fn is_package_with_cache<'a>(
    path: &'a Path,
    namespace_packages: &[PathBuf],
    package_cache: &mut FxHashMap<&'a Path, bool>,
) -> bool {
    *package_cache
        .entry(path)
        .or_insert_with(|| is_package(path, namespace_packages))
}

/// Applies a transformation to a [`Configuration`].
///
/// Used to override options with the values provided by the CLI.
pub trait ConfigurationTransformer {
    fn transform(&self, config: Configuration) -> Configuration;
}

/// Recursively resolve a [`Configuration`] from a `pyproject.toml` file at the
/// specified [`Path`].
// TODO(charlie): This whole system could do with some caching. Right now, if a
// configuration file extends another in the same path, we'll re-parse the same
// file at least twice (possibly more than twice, since we'll also parse it when
// resolving the "default" configuration).
pub fn resolve_configuration(
    pyproject: &Path,
    transformer: &dyn ConfigurationTransformer,
    origin: ConfigurationOrigin,
) -> Result<Configuration> {
    let relativity = Relativity::from(origin);
    let mut configurations = indexmap::IndexMap::new();
    let mut next = Some(fs::normalize_path(pyproject));
    while let Some(path) = next {
        if configurations.contains_key(&path) {
            bail!(format!(
                "Circular configuration detected: {chain}",
                chain = configurations
                    .keys()
                    .chain([&path])
                    .map(|p| format!("`{}`", p.display()))
                    .join(" extends "),
            ));
        }

        // Resolve the current path.
        let version_strategy =
            if configurations.is_empty() && matches!(origin, ConfigurationOrigin::Ancestor) {
                // For configurations that are discovered by
                // walking back from a file, we will attempt to
                // infer the `target-version` if it is missing
                TargetVersionStrategy::RequiresPythonFallback
            } else {
                // In all other cases (e.g. for configurations
                // inherited via `extend`, or user-level settings)
                // we do not attempt to infer a missing `target-version`
                TargetVersionStrategy::UseDefault
            };
        let options = pyproject::load_options(&path, &version_strategy).with_context(|| {
            if configurations.is_empty() {
                format!(
                    "Failed to load configuration `{path}`",
                    path = path.display()
                )
            } else {
                let chain = configurations
                    .keys()
                    .chain([&path])
                    .map(|p| format!("`{}`", p.display()))
                    .join(" extends ");
                format!(
                    "Failed to load extended configuration `{path}` ({chain})",
                    path = path.display()
                )
            }
        })?;

        let project_root = relativity.resolve(&path);
        let configuration = Configuration::from_options(options, Some(&path), project_root)?;

        // If extending, continue to collect.
        next = configuration.extend.as_ref().map(|extend| {
            fs::normalize_path_to(
                extend,
                path.parent()
                    .expect("Expected pyproject.toml file to be in parent directory"),
            )
        });

        // Keep track of (1) the paths we've already resolved (to avoid cycles), and (2)
        // the base configuration for every path.
        configurations.insert(path, configuration);
    }

    // Merge the configurations, in order.
    let mut configurations = configurations.into_values();
    let mut configuration = configurations.next().unwrap();
    for extend in configurations {
        configuration = configuration.combine(extend);
    }
    Ok(transformer.transform(configuration))
}

/// Extract the project root (scope) and [`Settings`] from a given
/// `pyproject.toml`.
fn resolve_scoped_settings<'a>(
    pyproject: &'a Path,
    transformer: &dyn ConfigurationTransformer,
    origin: ConfigurationOrigin,
) -> Result<(&'a Path, Settings)> {
    let relativity = Relativity::from(origin);

    let configuration = resolve_configuration(pyproject, transformer, origin)?;
    let project_root = relativity.resolve(pyproject);
    let settings = configuration.into_settings(project_root)?;
    Ok((project_root, settings))
}

/// Extract the [`Settings`] from a given `pyproject.toml` and process the
/// configuration with the given [`ConfigurationTransformer`].
pub fn resolve_root_settings(
    pyproject: &Path,
    transformer: &dyn ConfigurationTransformer,
    origin: ConfigurationOrigin,
) -> Result<Settings> {
    let (_project_root, settings) = resolve_scoped_settings(pyproject, transformer, origin)?;
    Ok(settings)
}

#[derive(Debug, Clone, Copy)]
/// How the configuration is provided.
pub enum ConfigurationOrigin {
    /// Origin is unknown to the caller
    Unknown,
    /// User specified path to specific configuration file
    UserSpecified,
    /// User-level configuration (e.g. in `~/.config/ruff/pyproject.toml`)
    UserSettings,
    /// In parent or higher ancestor directory of path
    Ancestor,
}

impl From<ConfigurationOrigin> for Relativity {
    fn from(value: ConfigurationOrigin) -> Self {
        match value {
            ConfigurationOrigin::Unknown => Self::Parent,
            ConfigurationOrigin::UserSpecified => Self::Cwd,
            ConfigurationOrigin::UserSettings => Self::Cwd,
            ConfigurationOrigin::Ancestor => Self::Parent,
        }
    }
}

/// Find all Python (`.py`, `.pyi`, `.pyw`, and `.ipynb` files) in a set of paths.
pub fn python_files_in_path<'a>(
    paths: &[PathBuf],
    pyproject_config: &'a PyprojectConfig,
    transformer: &(dyn ConfigurationTransformer + Sync),
) -> Result<(Vec<Result<ResolvedFile, ignore::Error>>, Resolver<'a>)> {
    // Normalize every path (e.g., convert from relative to absolute).
    let mut paths: Vec<PathBuf> = paths.iter().map(fs::normalize_path).unique().collect();

    // Search for `pyproject.toml` files in all parent directories.
    let mut resolver = Resolver::new(pyproject_config);
    let mut seen = FxHashSet::default();

    // Insert the path to the root configuration to avoid parsing the configuration a second time.
    if let Some(config_path) = &pyproject_config.path {
        seen.insert(config_path.parent().unwrap());
    }

    if resolver.is_hierarchical() {
        for path in &paths {
            for ancestor in path.ancestors() {
                if seen.insert(ancestor) {
                    if let Some(pyproject) = settings_toml(ancestor)? {
                        let (root, settings) = resolve_scoped_settings(
                            &pyproject,
                            transformer,
                            ConfigurationOrigin::Ancestor,
                        )?;
                        resolver.add(root, settings);
                        // We found the closest configuration.
                        break;
                    }
                } else {
                    // We already visited this ancestor, we can stop here.
                    break;
                }
            }
        }
    }

    // Check if the paths themselves are excluded.
    if resolver.force_exclude() {
        paths.retain(|path| !is_file_excluded(path, &resolver));
        if paths.is_empty() {
            return Ok((vec![], resolver));
        }
    }

    let (first_path, rest_paths) = paths
        .split_first()
        .ok_or_else(|| anyhow!("Expected at least one path to search for Python files"))?;
    // Create the `WalkBuilder`.
    let mut builder = WalkBuilder::new(first_path);

    if let Ok(cwd) = std::env::current_dir() {
        builder.current_dir(cwd);
    }

    for path in rest_paths {
        builder.add(path);
    }
    builder.standard_filters(resolver.respect_gitignore());
    builder.hidden(false);

    builder.threads(
        std::thread::available_parallelism()
            .map_or(1, std::num::NonZeroUsize::get)
            .min(12),
    );

    let walker = builder.build_parallel();

    // Run the `WalkParallel` to collect all Python files.
    let state = WalkPythonFilesState::new(resolver);
    let mut visitor = PythonFilesVisitorBuilder::new(transformer, &state);
    walker.visit(&mut visitor);

    state.finish()
}

type ResolvedFiles = Vec<Result<ResolvedFile, ignore::Error>>;

struct WalkPythonFilesState<'config> {
    is_hierarchical: bool,
    merged: std::sync::Mutex<(ResolvedFiles, Result<()>)>,
    resolver: RwLock<Resolver<'config>>,
}

impl<'config> WalkPythonFilesState<'config> {
    fn new(resolver: Resolver<'config>) -> Self {
        Self {
            is_hierarchical: resolver.is_hierarchical(),
            merged: std::sync::Mutex::new((Vec::new(), Ok(()))),
            resolver: RwLock::new(resolver),
        }
    }

    fn finish(self) -> Result<(Vec<Result<ResolvedFile, ignore::Error>>, Resolver<'config>)> {
        let (files, error) = self.merged.into_inner().unwrap();
        error?;

        let deduplicated_files = deduplicate_files(files);

        Ok((deduplicated_files, self.resolver.into_inner().unwrap()))
    }
}

/// Deduplicate files by path, prioritizing `Root` files over `Nested` files.
///
/// When the same path appears both as a directly specified input (`Root`)
/// and via directory traversal (`Nested`), keep the `Root` entry and drop
/// the `Nested` entry.
///
/// Dropping the root entry means that the explicitly passed path may be
/// unintentionally ignored, since it is treated as nested and can be excluded
/// despite being requested.
///
/// Concretely, with `lint.exclude = ["foo.py"]` and `ruff check . foo.py`,
/// we must keep `Root(foo.py)` and drop `Nested(foo.py)` so `foo.py` is
/// linted as the user requested.
fn deduplicate_files(mut files: ResolvedFiles) -> ResolvedFiles {
    // Sort by path; for identical paths, prefer Root over Nested; place errors after files
    files.sort_by(|a, b| match (a, b) {
        (Ok(a_file), Ok(b_file)) => a_file.cmp(b_file),
        (Ok(_), Err(_)) => Ordering::Less,
        (Err(_), Ok(_)) => Ordering::Greater,
        (Err(_), Err(_)) => Ordering::Equal,
    });

    files.dedup_by(|a, b| match (a, b) {
        (Ok(a_file), Ok(b_file)) => a_file.path() == b_file.path(),
        _ => false,
    });

    files
}

struct PythonFilesVisitorBuilder<'s, 'config> {
    state: &'s WalkPythonFilesState<'config>,
    transformer: &'s (dyn ConfigurationTransformer + Sync),
}

impl<'s, 'config> PythonFilesVisitorBuilder<'s, 'config> {
    fn new(
        transformer: &'s (dyn ConfigurationTransformer + Sync),
        state: &'s WalkPythonFilesState<'config>,
    ) -> Self {
        Self { state, transformer }
    }
}

struct PythonFilesVisitor<'s, 'config> {
    local_files: Vec<Result<ResolvedFile, ignore::Error>>,
    local_error: Result<()>,
    global: &'s WalkPythonFilesState<'config>,
    transformer: &'s (dyn ConfigurationTransformer + Sync),
}

impl<'config, 's> ignore::ParallelVisitorBuilder<'s> for PythonFilesVisitorBuilder<'s, 'config>
where
    'config: 's,
{
    fn build(&mut self) -> Box<dyn ignore::ParallelVisitor + 's> {
        Box::new(PythonFilesVisitor {
            local_files: vec![],
            local_error: Ok(()),
            global: self.state,
            transformer: self.transformer,
        })
    }
}

impl ParallelVisitor for PythonFilesVisitor<'_, '_> {
    fn visit(&mut self, result: std::result::Result<DirEntry, Error>) -> WalkState {
        // Respect our own exclusion behavior.
        if let Ok(entry) = &result {
            if entry.depth() > 0 {
                let path = entry.path();
                let resolver = self.global.resolver.read().unwrap();
                let settings = resolver.resolve(path);
                if let Some(file_name) = path.file_name() {
                    let file_path = Candidate::new(path);
                    let file_basename = Candidate::new(file_name);
                    if match_candidate_exclusion(
                        &file_path,
                        &file_basename,
                        &settings.file_resolver.exclude,
                    ) {
                        debug!("Ignored path via `exclude`: {path:?}");
                        return WalkState::Skip;
                    } else if match_candidate_exclusion(
                        &file_path,
                        &file_basename,
                        &settings.file_resolver.extend_exclude,
                    ) {
                        debug!("Ignored path via `extend-exclude`: {path:?}");
                        return WalkState::Skip;
                    }
                } else {
                    debug!("Ignored path due to error in parsing: {path:?}");
                    return WalkState::Skip;
                }
            }
        }

        // Search for the `pyproject.toml` file in this directory, before we visit any
        // of its contents.
        if self.global.is_hierarchical {
            if let Ok(entry) = &result {
                if entry
                    .file_type()
                    .is_some_and(|file_type| file_type.is_dir())
                {
                    match settings_toml(entry.path()) {
                        Ok(Some(pyproject)) => match resolve_scoped_settings(
                            &pyproject,
                            self.transformer,
                            ConfigurationOrigin::Ancestor,
                        ) {
                            Ok((root, settings)) => {
                                self.global.resolver.write().unwrap().add(root, settings);
                            }
                            Err(err) => {
                                self.local_error = Err(err);
                                return WalkState::Quit;
                            }
                        },
                        Ok(None) => {}
                        Err(err) => {
                            self.local_error = Err(err);
                            return WalkState::Quit;
                        }
                    }
                }
            }
        }

        match result {
            Ok(entry) => {
                // Ignore directories
                let resolved = if entry.file_type().is_none_or(|ft| ft.is_dir()) {
                    None
                } else if entry.depth() == 0 {
                    // Accept all files that are passed-in directly.
                    Some(ResolvedFile::Root(entry.into_path()))
                } else {
                    // Otherwise, check if the file is included.
                    let path = entry.path();
                    let resolver = self.global.resolver.read().unwrap();
                    let settings = resolver.resolve(path);
                    if settings.file_resolver.include.is_match(path) {
                        debug!("Included path via `include`: {path:?}");
                        Some(ResolvedFile::Nested(entry.into_path()))
                    } else if settings.file_resolver.extend_include.is_match(path) {
                        debug!("Included path via `extend-include`: {path:?}");
                        Some(ResolvedFile::Nested(entry.into_path()))
                    } else {
                        None
                    }
                };

                if let Some(resolved) = resolved {
                    self.local_files.push(Ok(resolved));
                }
            }
            Err(err) => {
                self.local_files.push(Err(err));
            }
        }

        WalkState::Continue
    }
}

impl Drop for PythonFilesVisitor<'_, '_> {
    fn drop(&mut self) {
        let mut merged = self.global.merged.lock().unwrap();
        let (files, error) = &mut *merged;

        if files.is_empty() {
            *files = std::mem::take(&mut self.local_files);
        } else {
            files.append(&mut self.local_files);
        }

        let local_error = std::mem::replace(&mut self.local_error, Ok(()));
        if error.is_ok() {
            *error = local_error;
        }
    }
}

#[derive(Clone, Debug, PartialEq, Eq, Ord, PartialOrd)]
pub enum ResolvedFile {
    /// File explicitly passed to the CLI
    Root(PathBuf),
    /// File in a sub-directory
    Nested(PathBuf),
}

impl ResolvedFile {
    pub fn into_path(self) -> PathBuf {
        match self {
            ResolvedFile::Root(path) => path,
            ResolvedFile::Nested(path) => path,
        }
    }

    pub fn path(&self) -> &Path {
        match self {
            ResolvedFile::Root(root) => root.as_path(),
            ResolvedFile::Nested(root) => root.as_path(),
        }
    }

    pub fn file_name(&self) -> &OsStr {
        let path = self.path();
        path.file_name().unwrap_or(path.as_os_str())
    }

    pub fn is_root(&self) -> bool {
        matches!(self, ResolvedFile::Root(_))
    }
}

/// Return `true` if the Python file at [`Path`] is _not_ excluded.
pub fn python_file_at_path(
    path: &Path,
    resolver: &mut Resolver,
    transformer: &dyn ConfigurationTransformer,
) -> Result<bool> {
    // Normalize the path (e.g., convert from relative to absolute).
    let path = fs::normalize_path(path);

    // Search for `pyproject.toml` files in all parent directories.
    if resolver.is_hierarchical() {
        for ancestor in path.ancestors() {
            if let Some(pyproject) = settings_toml(ancestor)? {
                let (root, settings) =
                    resolve_scoped_settings(&pyproject, transformer, ConfigurationOrigin::Unknown)?;
                resolver.add(root, settings);
                break;
            }
        }
    }

    // Check exclusions.
    Ok(!is_file_excluded(&path, resolver))
}

/// Return `true` if the given top-level [`Path`] should be excluded.
fn is_file_excluded(path: &Path, resolver: &Resolver) -> bool {
    // TODO(charlie): Respect gitignore.
    for path in path.ancestors() {
        let settings = resolver.resolve(path);
        if let Some(file_name) = path.file_name() {
            let file_path = Candidate::new(path);
            let file_basename = Candidate::new(file_name);
            if match_candidate_exclusion(
                &file_path,
                &file_basename,
                &settings.file_resolver.exclude,
            ) {
                debug!("Ignored path via `exclude`: {path:?}");
                return true;
            } else if match_candidate_exclusion(
                &file_path,
                &file_basename,
                &settings.file_resolver.extend_exclude,
            ) {
                debug!("Ignored path via `extend-exclude`: {path:?}");
                return true;
            }
        } else {
            break;
        }
        if path == settings.file_resolver.project_root {
            // Bail out; we'd end up past the project root on the next iteration
            // (excludes etc. are thus "rooted" to the project).
            break;
        }
    }
    false
}

/// Return `true` if the given file should be ignored based on the exclusion
/// criteria.
#[inline]
pub fn match_exclusion<P: AsRef<Path>, R: AsRef<Path>>(
    file_path: P,
    file_basename: R,
    exclusion: &GlobSet,
) -> bool {
    match_candidate_exclusion(
        &Candidate::new(file_path.as_ref()),
        &Candidate::new(file_basename.as_ref()),
        exclusion,
    )
}

/// Return `true` if the given candidates should be ignored based on the exclusion
/// criteria.
pub fn match_candidate_exclusion(
    file_path: &Candidate,
    file_basename: &Candidate,
    exclusion: &GlobSet,
) -> bool {
    if exclusion.is_empty() {
        return false;
    }
    exclusion.is_match_candidate(file_path) || exclusion.is_match_candidate(file_basename)
}

#[derive(Debug, Copy, Clone)]
pub enum ExclusionKind {
    /// The exclusion came from the `exclude` setting.
    Exclude,
    /// The exclusion came from the `extend-exclude` setting.
    ExtendExclude,
    /// The exclusion came from the `lint.exclude` setting.
    LintExclude,
    /// The exclusion came from the `lint.extend-exclude` setting.
    FormatExclude,
}

impl std::fmt::Display for ExclusionKind {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            ExclusionKind::Exclude => write!(f, "exclude"),
            ExclusionKind::ExtendExclude => write!(f, "extend-exclude"),
            ExclusionKind::LintExclude => write!(f, "lint.exclude"),
            ExclusionKind::FormatExclude => write!(f, "lint.extend-exclude"),
        }
    }
}

/// Return the [`ExclusionKind`] for a given [`Path`], if the path or any of its ancestors match
/// any of the exclusion criteria.
pub fn match_any_exclusion(
    path: &Path,
    resolver_settings: &FileResolverSettings,
    lint_exclude: Option<&GlobSet>,
    format_exclude: Option<&GlobSet>,
) -> Option<ExclusionKind> {
    for path in path.ancestors() {
        if let Some(basename) = path.file_name() {
            let path = Candidate::new(path);
            let basename = Candidate::new(basename);
            if match_candidate_exclusion(&path, &basename, &resolver_settings.exclude) {
                return Some(ExclusionKind::Exclude);
            }
            if match_candidate_exclusion(&path, &basename, &resolver_settings.extend_exclude) {
                return Some(ExclusionKind::ExtendExclude);
            }
            if let Some(lint_exclude) = lint_exclude {
                if match_candidate_exclusion(&path, &basename, lint_exclude) {
                    return Some(ExclusionKind::LintExclude);
                }
            }
            if let Some(format_exclude) = format_exclude {
                if match_candidate_exclusion(&path, &basename, format_exclude) {
                    return Some(ExclusionKind::FormatExclude);
                }
            }
        }
        if path == resolver_settings.project_root {
            // Bail out; we'd end up past the project root on the next iteration
            // (excludes etc. are thus "rooted" to the project).
            break;
        }
    }
    None
}

#[derive(Debug, Copy, Clone)]
pub enum InclusionKind {
    /// The inclusion came from the `include` setting.
    Include,
    /// The inclusion came from the `extend-include` setting.
    ExtendInclude,
}

impl std::fmt::Display for InclusionKind {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            InclusionKind::Include => write!(f, "include"),
            InclusionKind::ExtendInclude => write!(f, "extend-include"),
        }
    }
}

/// Return the [`InclusionKind`] for a given [`Path`], if the path match any of the inclusion
/// criteria.
pub fn match_any_inclusion(
    path: &Path,
    resolver_settings: &FileResolverSettings,
) -> Option<InclusionKind> {
    if resolver_settings.include.is_match(path) {
        Some(InclusionKind::Include)
    } else if resolver_settings.extend_include.is_match(path) {
        Some(InclusionKind::ExtendInclude)
    } else {
        None
    }
}

#[cfg(test)]
mod tests {
    use std::fs::{File, create_dir};
    use std::path::Path;

    use anyhow::Result;
    use globset::GlobSet;
    use itertools::Itertools;
    use path_absolutize::Absolutize;
    use tempfile::TempDir;

    use ruff_linter::settings::types::{FilePattern, GlobPath};

    use crate::configuration::Configuration;
    use crate::pyproject::find_settings_toml;
    use crate::resolver::{
        ConfigurationOrigin, ConfigurationTransformer, PyprojectConfig, PyprojectDiscoveryStrategy,
        ResolvedFile, Resolver, is_file_excluded, match_exclusion, python_files_in_path,
        resolve_root_settings,
    };
    use crate::settings::Settings;
    use crate::tests::test_resource_path;

    struct NoOpTransformer;

    impl ConfigurationTransformer for NoOpTransformer {
        fn transform(&self, config: Configuration) -> Configuration {
            config
        }
    }

    #[test]
    fn rooted_exclusion() -> Result<()> {
        let package_root = test_resource_path("package");
        let pyproject_config = PyprojectConfig::new(
            PyprojectDiscoveryStrategy::Hierarchical,
            resolve_root_settings(
                &find_settings_toml(&package_root)?.unwrap(),
                &NoOpTransformer,
                ConfigurationOrigin::Ancestor,
            )?,
            None,
        );
        let resolver = Resolver::new(&pyproject_config);
        // src/app.py should not be excluded even if it lives in a hierarchy that should
        // be excluded by virtue of the pyproject.toml having `resources/*` in
        // it.
        assert!(!is_file_excluded(
            &package_root.join("src/app.py"),
            &resolver,
        ));
        // However, resources/ignored.py should be ignored, since that `resources` is
        // beneath the package root.
        assert!(is_file_excluded(
            &package_root.join("resources/ignored.py"),
            &resolver,
        ));
        Ok(())
    }

    #[test]
    fn find_python_files() -> Result<()> {
        // Initialize the filesystem:
        //   root
        //   ├── file1.py
        //   ├── dir1.py
        //   │   └── file2.py
        //   └── dir2.py
        let tmp_dir = TempDir::new()?;
        let root = tmp_dir.path();
        let file1 = root.join("file1.py");
        let dir1 = root.join("dir1.py");
        let file2 = dir1.join("file2.py");
        let dir2 = root.join("dir2.py");
        File::create(&file1)?;
        create_dir(dir1)?;
        File::create(&file2)?;
        create_dir(dir2)?;

        let (paths, _) = python_files_in_path(
            &[root.to_path_buf()],
            &PyprojectConfig::new(PyprojectDiscoveryStrategy::Fixed, Settings::default(), None),
            &NoOpTransformer,
        )?;
        let paths = paths
            .into_iter()
            .flatten()
            .map(ResolvedFile::into_path)
            .sorted()
            .collect::<Vec<_>>();
        assert_eq!(paths, [file2, file1]);

        Ok(())
    }

    fn make_exclusion(file_pattern: FilePattern) -> GlobSet {
        let mut builder = globset::GlobSetBuilder::new();
        file_pattern.add_to(&mut builder).unwrap();
        builder.build().unwrap()
    }

    #[test]
    fn exclusions() {
        let project_root = Path::new("/tmp/");

        let path = Path::new("foo").absolutize_from(project_root).unwrap();
        let exclude =
            FilePattern::User("foo".to_string(), GlobPath::normalize("foo", project_root));
        let file_path = &path;
        let file_basename = path.file_name().unwrap();
        assert!(match_exclusion(
            file_path,
            file_basename,
            &make_exclusion(exclude),
        ));

        let path = Path::new("foo/bar").absolutize_from(project_root).unwrap();
        let exclude =
            FilePattern::User("bar".to_string(), GlobPath::normalize("bar", project_root));
        let file_path = &path;
        let file_basename = path.file_name().unwrap();
        assert!(match_exclusion(
            file_path,
            file_basename,
            &make_exclusion(exclude),
        ));

        let path = Path::new("foo/bar/baz.py")
            .absolutize_from(project_root)
            .unwrap();
        let exclude = FilePattern::User(
            "baz.py".to_string(),
            GlobPath::normalize("baz.py", project_root),
        );
        let file_path = &path;
        let file_basename = path.file_name().unwrap();
        assert!(match_exclusion(
            file_path,
            file_basename,
            &make_exclusion(exclude),
        ));

        let path = Path::new("foo/bar").absolutize_from(project_root).unwrap();
        let exclude = FilePattern::User(
            "foo/bar".to_string(),
            GlobPath::normalize("foo/bar", project_root),
        );
        let file_path = &path;
        let file_basename = path.file_name().unwrap();
        assert!(match_exclusion(
            file_path,
            file_basename,
            &make_exclusion(exclude),
        ));

        let path = Path::new("foo/bar/baz.py")
            .absolutize_from(project_root)
            .unwrap();
        let exclude = FilePattern::User(
            "foo/bar/baz.py".to_string(),
            GlobPath::normalize("foo/bar/baz.py", project_root),
        );
        let file_path = &path;
        let file_basename = path.file_name().unwrap();
        assert!(match_exclusion(
            file_path,
            file_basename,
            &make_exclusion(exclude),
        ));

        let path = Path::new("foo/bar/baz.py")
            .absolutize_from(project_root)
            .unwrap();
        let exclude = FilePattern::User(
            "foo/bar/*.py".to_string(),
            GlobPath::normalize("foo/bar/*.py", project_root),
        );
        let file_path = &path;
        let file_basename = path.file_name().unwrap();
        assert!(match_exclusion(
            file_path,
            file_basename,
            &make_exclusion(exclude),
        ));

        let path = Path::new("foo/bar/baz.py")
            .absolutize_from(project_root)
            .unwrap();
        let exclude =
            FilePattern::User("baz".to_string(), GlobPath::normalize("baz", project_root));
        let file_path = &path;
        let file_basename = path.file_name().unwrap();
        assert!(!match_exclusion(
            file_path,
            file_basename,
            &make_exclusion(exclude),
        ));
    }
}
